{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "import pandas as pd\n",
    "import os\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entity</th>\n",
       "      <th>cpa</th>\n",
       "      <th>closest_entity</th>\n",
       "      <th>time</th>\n",
       "      <th>run</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.476739</td>\n",
       "      <td>2</td>\n",
       "      <td>52.3</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>5.476739</td>\n",
       "      <td>1</td>\n",
       "      <td>52.3</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1.919017</td>\n",
       "      <td>2</td>\n",
       "      <td>5.4</td>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>1.919017</td>\n",
       "      <td>1</td>\n",
       "      <td>5.4</td>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>4.505857</td>\n",
       "      <td>2</td>\n",
       "      <td>10.4</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   entity       cpa  closest_entity  time  run\n",
       "0       1  5.476739               2  52.3   77\n",
       "1       2  5.476739               1  52.3   77\n",
       "2       1  1.919017               2   5.4   86\n",
       "3       2  1.919017               1   5.4   86\n",
       "4       1  4.505857               2  10.4    9"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying'))\n",
    "files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv'))\n",
    "\n",
    "agg = pd.DataFrame()\n",
    "for file in files:\n",
    "    run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1])\n",
    "    frame = pd.read_csv(file)\n",
    "    frame['run'] = run_num\n",
    "    agg = pd.concat([agg, frame], copy=False)\n",
    "agg = agg.reset_index(drop=True)\n",
    "agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MS_gain</th>\n",
       "      <th>max_speed</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>run</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.733505</td>\n",
       "      <td>24.813339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.497387</td>\n",
       "      <td>22.070393</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.515396</td>\n",
       "      <td>19.717730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.053652</td>\n",
       "      <td>17.737459</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.957513</td>\n",
       "      <td>20.121153</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      MS_gain  max_speed\n",
       "run                     \n",
       "1    1.733505  24.813339\n",
       "2    0.497387  22.070393\n",
       "3    0.515396  19.717730\n",
       "4    0.053652  17.737459\n",
       "5    1.957513  20.121153"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run')\n",
    "params_agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entity</th>\n",
       "      <th>cpa</th>\n",
       "      <th>closest_entity</th>\n",
       "      <th>time</th>\n",
       "      <th>run</th>\n",
       "      <th>MS_gain</th>\n",
       "      <th>max_speed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.476739</td>\n",
       "      <td>2</td>\n",
       "      <td>52.3</td>\n",
       "      <td>77</td>\n",
       "      <td>1.805475</td>\n",
       "      <td>20.063098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>5.476739</td>\n",
       "      <td>1</td>\n",
       "      <td>52.3</td>\n",
       "      <td>77</td>\n",
       "      <td>1.805475</td>\n",
       "      <td>20.063098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1.919017</td>\n",
       "      <td>2</td>\n",
       "      <td>5.4</td>\n",
       "      <td>86</td>\n",
       "      <td>0.336285</td>\n",
       "      <td>24.769219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>1.919017</td>\n",
       "      <td>1</td>\n",
       "      <td>5.4</td>\n",
       "      <td>86</td>\n",
       "      <td>0.336285</td>\n",
       "      <td>24.769219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>4.505857</td>\n",
       "      <td>2</td>\n",
       "      <td>10.4</td>\n",
       "      <td>9</td>\n",
       "      <td>1.370620</td>\n",
       "      <td>16.866877</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   entity       cpa  closest_entity  time  run   MS_gain  max_speed\n",
       "0       1  5.476739               2  52.3   77  1.805475  20.063098\n",
       "1       2  5.476739               1  52.3   77  1.805475  20.063098\n",
       "2       1  1.919017               2   5.4   86  0.336285  24.769219\n",
       "3       2  1.919017               1   5.4   86  0.336285  24.769219\n",
       "4       1  4.505857               2  10.4    9  1.370620  16.866877"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = agg.join(params_agg, on='run')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entity</th>\n",
       "      <th>cpa</th>\n",
       "      <th>closest_entity</th>\n",
       "      <th>time</th>\n",
       "      <th>run</th>\n",
       "      <th>MS_gain</th>\n",
       "      <th>max_speed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>5.476739</td>\n",
       "      <td>2</td>\n",
       "      <td>52.3</td>\n",
       "      <td>77</td>\n",
       "      <td>1.805475</td>\n",
       "      <td>20.063098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1.919017</td>\n",
       "      <td>2</td>\n",
       "      <td>5.4</td>\n",
       "      <td>86</td>\n",
       "      <td>0.336285</td>\n",
       "      <td>24.769219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>4.505857</td>\n",
       "      <td>2</td>\n",
       "      <td>10.4</td>\n",
       "      <td>9</td>\n",
       "      <td>1.370620</td>\n",
       "      <td>16.866877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>4.180133</td>\n",
       "      <td>2</td>\n",
       "      <td>23.9</td>\n",
       "      <td>84</td>\n",
       "      <td>1.346520</td>\n",
       "      <td>22.231913</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>2.104974</td>\n",
       "      <td>2</td>\n",
       "      <td>9.2</td>\n",
       "      <td>100</td>\n",
       "      <td>0.669359</td>\n",
       "      <td>17.816883</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   entity       cpa  closest_entity  time  run   MS_gain  max_speed\n",
       "0       1  5.476739               2  52.3   77  1.805475  20.063098\n",
       "2       1  1.919017               2   5.4   86  0.336285  24.769219\n",
       "4       1  4.505857               2  10.4    9  1.370620  16.866877\n",
       "6       1  4.180133               2  23.9   84  1.346520  22.231913\n",
       "8       1  2.104974               2   9.2  100  0.669359  17.816883"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entity1 = data[::2]\n",
    "entity1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>cpa</td>       <th>  R-squared:         </th> <td>   0.972</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.972</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   1730.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 05 Dec 2018</td> <th>  Prob (F-statistic):</th> <td>3.65e-77</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>14:49:16</td>     <th>  Log-Likelihood:    </th> <td> -95.970</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   100</td>      <th>  AIC:               </th> <td>   195.9</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>    98</td>      <th>  BIC:               </th> <td>   201.2</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     2</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>MS_gain</th>   <td>    2.6621</td> <td>    0.109</td> <td>   24.399</td> <td> 0.000</td> <td>    2.446</td> <td>    2.879</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>max_speed</th> <td>    0.0380</td> <td>    0.006</td> <td>    6.095</td> <td> 0.000</td> <td>    0.026</td> <td>    0.050</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>136.337</td> <th>  Durbin-Watson:     </th> <td>   2.117</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td>4337.281</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 4.736</td>  <th>  Prob(JB):          </th> <td>    0.00</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td>33.842</td>  <th>  Cond. No.          </th> <td>    34.6</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    cpa   R-squared:                       0.972\n",
       "Model:                            OLS   Adj. R-squared:                  0.972\n",
       "Method:                 Least Squares   F-statistic:                     1730.\n",
       "Date:                Wed, 05 Dec 2018   Prob (F-statistic):           3.65e-77\n",
       "Time:                        14:49:16   Log-Likelihood:                -95.970\n",
       "No. Observations:                 100   AIC:                             195.9\n",
       "Df Residuals:                      98   BIC:                             201.2\n",
       "Df Model:                           2                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "MS_gain        2.6621      0.109     24.399      0.000       2.446       2.879\n",
       "max_speed      0.0380      0.006      6.095      0.000       0.026       0.050\n",
       "==============================================================================\n",
       "Omnibus:                      136.337   Durbin-Watson:                   2.117\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):             4337.281\n",
       "Skew:                           4.736   Prob(JB):                         0.00\n",
       "Kurtosis:                      33.842   Cond. No.                         34.6\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Basic statistical analysis\n",
    "X = entity1[[\"MS_gain\", 'max_speed']]\n",
    "y = entity1['cpa']\n",
    "\n",
    "model = sm.OLS(y, X).fit()\n",
    "predictions = model.predict(X)\n",
    "\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
